In [1]:
import sys, os
sys.path.insert(0, os.path.join("..", ".."))
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.collections
import numpy as np
import open_cp.network
import open_cp.network_hotspot
import open_cp.sources.chicago
import open_cp.geometry
This notebook is some musings on combinatorial explosion problems I am having.
A problem we run into is that our graph algorithms are:
Let's look more closely at the graphs. Suppose we "topologically reduce" the graph: delete any vertex which has just two neighbours. This does not change the search we need to perform when assigning risk, but it does decrease the number of edges and vertices in the graph. On second thoughts this didn't seem to help that much.
In [2]:
with open("Case study Chicago/input.graph", "rb") as f:
graph = open_cp.network.PlanarGraph.from_bytes(f.read())
In [3]:
reduced = open_cp.network.simple_reduce_graph(graph)
In [4]:
graph.number_edges, reduced.number_edges
Out[4]:
In [5]:
fig, axes = plt.subplots(ncols=2, figsize=(18,8))
for ax in axes:
lc = matplotlib.collections.LineCollection(graph.as_lines(), color="black", linewidth=0.5)
ax.add_collection(lc)
xcs, ycs = [], []
for k in reduced.vertices:
xcs.append(graph.vertices[k][0])
ycs.append(graph.vertices[k][1])
for ax in axes:
ax.scatter(xcs, ycs)
axes[0].set(xlim=[355000, 365000], ylim=[565000, 575000])
axes[1].set(xlim=[358000, 360000], ylim=[567000, 569000])
None
The underlying problem seems to be that there is just a combinatorial explosion in the number of paths to consider.
In [6]:
xmin, xmax = 359000, 359200
ymin, ymax = 568500, 568750
[k for k in reduced.vertices if graph.vertices[k][0] >= xmin and graph.vertices[k][0] <= xmax
and graph.vertices[k][1] >= ymin and graph.vertices[k][1] <= ymax]
Out[6]:
In [7]:
verts = [6743, 17940]
fig, ax = plt.subplots(figsize=(8,8))
lc = matplotlib.collections.LineCollection(graph.as_lines(), color="black", linewidth=0.5)
ax.add_collection(lc)
xcs, ycs = [], []
for k in verts:
xcs.append(graph.vertices[k][0])
ycs.append(graph.vertices[k][1])
ax.scatter(xcs, ycs)
ax.set(xlim=[358000, 360000], ylim=[567500, 569500])
None
In [8]:
# Not too many paths between these two vertices...
len(list(graph.paths_between(6743, 17940, 1000)))
Out[8]:
In [9]:
# Explosion in the number of paths to consider for the KDE method...
lengths = [10,20,50,100,200,500,1000,1100,1200,1300]
num_paths = []
for length in lengths:
nup = len(list(graph.walk_with_degrees(6743, None, length, 10000000000)))
num_paths.append(nup)
plt.plot(lengths, num_paths)
Out[9]:
Let's look at an example: pick a nearby edge and see which paths end up crossing it (these are the paths we end up summing in the KDE method to get the final risk estimate).
In [10]:
print(graph.neighbourhood_edges(6743))
print(graph.edges[25201])
print(graph.neighbourhood_edges(13452))
edge = 25202
out = [ path for path in graph.walk_with_degrees(6743, None, 1300, 100000000)
if path[0] == edge ]
len(out)
Out[10]:
In [11]:
out.sort(key = lambda x : x[3])
out[:5], out[-5:]
Out[11]:
In [12]:
# Example kernel
kernel = open_cp.network_hotspot.TriangleKernel(1500)
out1 = [ kernel.integrate(start, end) / div
for _, start, end, div in out ]
cumulative_sum = [0]
for x in out1:
cumulative_sum.append(cumulative_sum[-1] + x)
cumulative_sum = np.asarray(cumulative_sum)
#plt.plot(cummulative_sum)
plt.plot(np.log(cumulative_sum[1:]))
Out[12]:
In [13]:
out[25], out[50]
Out[13]:
In [14]:
lengths = list(range(100,2100,100))
num_paths = []
for length in lengths:
nup = len(list(graph.walk_with_degrees(6743, None, length, 20000)))
num_paths.append(nup)
plt.plot(lengths, num_paths)
Out[14]:
In [15]:
import pickle, lzma
with lzma.open("Case study Chicago/input.pic.xz", "rb") as f:
timed_points = pickle.load(f)
trainer = open_cp.network_hotspot.Trainer()
trainer.graph = graph
trainer.maximum_edge_length = 20
trainer.data = timed_points
predictor = trainer.compile()
In [16]:
predictor.graph.number_edges, graph.number_edges
Out[16]:
In [17]:
graph.vertices[6743], predictor.graph.vertices[6743]
Out[17]:
In [18]:
lengths = list(range(100,2100,100))
num_paths = []
for length in lengths:
nup = len(list(predictor.graph.walk_with_degrees(6743, None, length, 20000)))
num_paths.append(nup)
plt.plot(lengths, num_paths)
Out[18]:
On the basis that Almost all programming can be viewed as an exercise in caching lets think a bit more.
However, we are still left considering all paths between edges, as the way to combine the length of the path, and the product of degrees, depends heavily on the (space) kernel is use.
In [19]:
fig, axes = plt.subplots(ncols=2, figsize=(19,8))
for ax in axes:
lc = matplotlib.collections.LineCollection(predictor.graph.as_lines(), color="black", linewidth=0.5)
ax.add_collection(lc)
xcs, ycs = [], []
xcs.append(graph.vertices[6743][0])
ycs.append(graph.vertices[6743][1])
ax.scatter(xcs, ycs)
(x1,y1),(x2,y2) = predictor.graph.as_lines()[8076]
ax.plot([x1,x2], [y1,y2], color="blue", linewidth=2)
axes[0].set(xlim=[358000, 360000], ylim=[567500, 569500])
axes[1].set(xlim=[358800, 359200], ylim=[568600, 569000])
None
In [20]:
predictor.graph.edges[8076]
Out[20]:
In [21]:
predictor.kernel = open_cp.network_hotspot.TriangleKernel(500)
risks = np.zeros(len(predictor.graph.edges))
predictor.add(risks, 8076, -1, 0.5, 1)
predictor.add(risks, 8076, 1, 0.5, 1)
# Remember to normalise!
risks /= predictor.graph.lengths
In [22]:
fig, ax = plt.subplots(figsize=(8,8))
lines = np.asarray(predictor.graph.as_lines())
mask = risks > 0
ri, li = risks[mask], lines[mask]
lc = matplotlib.collections.LineCollection(li, color="black", linewidth=5)
lc.set_array(ri)
lc.set(cmap="Blues")
ax.add_collection(lc)
x, y = graph.vertices[6743]
d = 700
ax.set(xlim=[x-d, x+d], ylim=[y-d, y+d])
None
Look at the street edge 8076 is on in more detail.
In [23]:
edges = []
edge = 8076
while True:
if edge in edges:
break
edges.append(edge)
for k in predictor.graph.edges[edge]:
nhood = list(predictor.graph.neighbourhood_edges(k))
nhood.remove(edge)
if len(nhood) == 1:
edge = nhood[0]
break
edges
Out[23]:
In [24]:
r = [risks[i] for i in edges]
fig, axes = plt.subplots(ncols=2, figsize=(16,5))
ax = axes[0]
ax.plot(r)
ax.set(title="Risk against edge")
ax = axes[1]
x = [0]
for i in edges:
x.append(x[-1] + predictor.graph.length(i))
x = np.asarray(x)
ax.plot((x[:-1]+x[1:])/2, r)
for i in range(len(x)-1):
ax.plot([x[i], x[i+1]], [r[i], r[i]], color="black")
ax.set(title="Risk against distance", xlabel="meters")
None
Conclusions:
In [25]:
dgraph = open_cp.network.to_derived_graph(predictor.graph, use_edge_indicies=True)
paths, prevs = open_cp.network.shortest_paths(dgraph, 8076)
assert min(paths.keys()) == 0
assert max(paths.keys()) == len(paths) - 1
paths = [paths[i] for i in range(len(paths))]
paths = np.asarray(paths)
In [26]:
disconnected = paths == -1
assert np.all(risks[disconnected] == 0)
risks, paths = risks[~disconnected], paths[~disconnected]
In [27]:
mask = risks > 0
np.max(paths[mask]), np.min(paths[~mask])
Out[27]:
In [28]:
fig, axes = plt.subplots(ncols=2, figsize=(19,8))
ax = axes[0]
ax.scatter(risks[mask], paths[mask], marker="x", linewidth=1, color="black")
ax.set(xlabel="Risk", ylabel="Distance", xlim=[-0.0001, 0.0021])
ax = axes[1]
ax.scatter(np.log(risks[mask]), paths[mask], marker="x", linewidth=1, color="black")
ax.set(xlabel="Log risk", ylabel="Distance")
None
In [29]:
risks = np.zeros(len(predictor.graph.edges))
predictor.add(risks, 8076, -1, 0.5, 1)
predictor.add(risks, 8076, 1, 0.5, 1)
risks /= predictor.graph.lengths
k1, k2 = predictor.graph.edges[8076]
paths1, prevs1 = open_cp.network.shortest_paths(predictor.graph, k1)
paths2, prevs2 = open_cp.network.shortest_paths(predictor.graph, k2)
paths1 = np.asarray([paths1[i] for i in range(len(paths1))])
paths2 = np.asarray([paths2[i] for i in range(len(paths2))])
paths, prevs = open_cp.network.shortest_paths(dgraph, 8076)
paths = np.asarray([paths[i] for i in range(len(paths))])
epaths, eprevs = open_cp.network.shortest_edge_paths(predictor.graph, 8076)
In [30]:
def shortest(edge_index):
k1, k2 = predictor.graph.edges[edge_index]
halves = (predictor.graph.length(edge_index) + predictor.graph.length(8076)) / 2
choices = [paths1[k1], paths1[k2], paths2[k1], paths2[k2]]
le = min(choices)
index = choices.index(le)
return le + halves, index
edge = 8010
k1, k2 = predictor.graph.edges[edge]
le = predictor.graph.length(edge) / 2
shortest(edge), paths[edge], min(epaths[k1]+le, epaths[k2]+le)
Out[30]:
In [31]:
def degree_of_shortest_path(edge_index):
k1, k2 = predictor.graph.edges[edge_index]
kk1, kk2 = predictor.graph.edges[8076]
_, choice = shortest(edge_index)
if choice == 0:
vertex, pa = k1, prevs1
elif choice == 1:
vertex, pa = k2, prevs1
elif choice == 2:
vertex, pa = k1, prevs2
elif choice == 3:
vertex, pa = k2, prevs2
else:
raise AssertionError
v = vertex
cum_deg = max(1, predictor.graph.degree(v) - 1)
while True:
vv = pa[v]
if v == vv:
break
v = vv
d = max(1, predictor.graph.degree(v) - 1)
cum_deg *= d
return cum_deg
degree_of_shortest_path(8010)
Out[31]:
In [32]:
adjusted_dists = np.empty_like(risks)
for i, r in enumerate(risks):
if r == 0:
continue
adjusted_dists[i], _ = shortest(i)
adjusted_dists[i] = predictor.kernel(adjusted_dists[i])
adjusted_dists[i] = adjusted_dists[i] / degree_of_shortest_path(i)
mask = (risks > 0)
r = risks[mask]
adjusted_dists = adjusted_dists[mask]
In [33]:
fig, axes = plt.subplots(ncols=2, figsize=(16,8))
ax = axes[0]
ax.scatter(r, adjusted_dists)
ax.set(xlabel="Risk", ylabel="From distance")
xmin, xmax = 0, max(r)
xd = (xmax - xmin) / 10
ymin, ymax = 0, max(adjusted_dists)
yd = (ymax - ymin) / 10
ax.set(xlim=[xmin-xd, xmax+xd], ylim=[ymin-yd, ymax+yd])
x = np.linspace(0, 0.002, 2)
ax.plot(x, x, color="red")
ax = axes[1]
mask = (r>0) & (adjusted_dists>0)
ax.scatter(np.log(r[mask]), np.log(adjusted_dists[mask]))
ax.set(xlabel="Log risk", ylabel="Log from distance")
None
In [34]:
pred = open_cp.network_hotspot.ApproxPredictor(predictor)
risks_approx = np.zeros(len(predictor.graph.edges))
pred.add_edge(risks_approx, 8076, 0.5, 1)
risks_approx /= predictor.graph.lengths
In [35]:
fig, axes = plt.subplots(ncols=2, figsize=(16,8))
ax = axes[0]
ax.scatter(risks, risks_approx)
ax.set(xlabel="Risk", ylabel="Approx risk")
xmin, xmax = 0, max(r)
xd = (xmax - xmin) / 10
ymin, ymax = 0, max(adjusted_dists)
yd = (ymax - ymin) / 10
ax.set(xlim=[xmin-xd, xmax+xd], ylim=[ymin-yd, ymax+yd])
x = np.linspace(0, 0.002, 2)
ax.plot(x, x, color="red")
ax = axes[1]
mask = (risks>0) & (risks_approx>0)
ax.scatter(np.log(risks[mask]), np.log(risks_approx[mask]))
ax.set(xlabel="Log risk", ylabel="Log from distance")
None
In [ ]: